%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

% APPENDIX: IMPLEMENT PAIR CLUSTER BOOTSTRAP FOR IVQR TO ESTIMATE STANDARD ERRORS

% "DEFORESTATION IN THE AMAZON:
% A UNIFIED FRAMEWORK FOR ESTIMATION AND POLICY ANALYSIS"

% by Eduardo Souza-Rodrigues

% This version: November 2018

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%% IMPLEMENT PAIR CLUSTER BOOTSTRAP

% Number of Bootstrap Replications
B = 500;

% Set seed
rng('default')

tic        
for farm = 1:4
    
    % I. Organize Data
    if farm == 1
    
        % Load the Data Set - Small Farms
        data1 = [data_folder,'\land_use_matlab_small.txt'];
        load(data1);
        data = land_use_matlab_small;
        clear data1 land_use_matlab_small;
    
    elseif farm == 2

        % Load the Data Set - Small-Medium Farms
        data1 = [data_folder,'\land_use_matlab_small_medium.txt'];
        load(data1);
        data = land_use_matlab_small_medium;
        clear data1 land_use_matlab_small_medium;
        
    elseif farm == 3

        % Load the Data Set - Medium-Large Farms
        data1 = [data_folder,'\land_use_matlab_medium_large.txt'];
        load(data1);
        data = land_use_matlab_medium_large;
        clear data1 land_use_matlab_medium_large;     
        
    elseif farm == 4

        % Load the Data Set - Large Farms
        data1 = [data_folder,'\land_use_matlab_large.txt'];
        load(data1);
        data = land_use_matlab_large;
        clear data1 land_use_matlab_large;
        
    end
    
    % Name of the variables
    Code        = data(:,1);      % Municipality Identifier (IBGE's code)
    Y           = data(:,2);      % Proportion of Deforestation
    Qa1         = data(:,3);      % Productivity Index - Only Crops
    Qa2         = data(:,4);      % Productivity Index - Crops and Pasture
    Qrice       = data(:,5);      % Yields Rice
    Qman        = data(:,6);      % Yields Manioc (Cassava)
    Qcorn       = data(:,7);      % Yields Corn
    Qsoy        = data(:,8);      % Yields Soy
    Qbean       = data(:,9);      % Yields Beans
    TC          = data(:,10);     % Transportation cost to the port
    Alt         = data(:,11);     % Altitude
    Temp        = data(:,12);     % Temperature
    Rain        = data(:,13);     % Rain
    Slope       = data(:,14);     % Average Slope
    Soil        = data(:,15:18);  % Proportion of soil quality = [prop_potenc2 prop_potenc3 prop_potenc4 prop_potenc5]
    Pop         = data(:,19);     % Population
    Mining      = data(:,20);     % Presence of Mining
    Powerplant  = data(:,21);     % Presence of Powerplant
    PPNeighbor  = data(:,22);     % Indicator for Powerplant Neighbor
    DistPA      = data(:,23);     % Straight Line Distance to nearest Protected Area
    Title       = data(:,24);     % Proportion of the Private Area with Land Title
    Fines2005   = data(:,25);     % Fines up to 2005
    Fines2003   = data(:,26);     % Fines up to 2003
    D           = data(:,27:28);  % Straight Line Distances = [dist to port, dist to state capital]
    Ibama       = data(:,29);     % Distance to the Nearest Ibama Agency
    CarbonDiff  = data(:,30);     % Difference between Forest Carbon Stock and Deforested Carbon Stock
    Area        = data(:,31);     % Area occupied by farms of the size considered
    G_IR        = data(:,32);     % Clusters, IBGE Immediate Regions
    G50         = data(:,33);     % Clusters, G = 50
    G75         = data(:,34);     % Clusters, G = 75
    G100        = data(:,35);     % Clusters, G = 100
    G125        = data(:,36);     % Clusters, G = 125   
    G150        = data(:,37);     % Clusters, G = 150
    Wpop        = data(:,38);     % Spatially Lagged Population
    Wmining     = data(:,39);     % Spatially Lagged Mining
    Wpowerplant = data(:,40);     % Spatially Lagged Powerplants
    WdistPA     = data(:,41);     % Spatially Lagged Proximity to Protected Areas
    
    % Define Geographic Clusters
    Cluster = [G_IR, G50, G150];
    %Cluster = [G_IR, G50, G75, G100, G125, G150];

    % Number of observations
    T  = size(Y,1);   
    
    % Truncate Y so that 0 < Y < 1
    Yt = Y + (myzero - Y).*(Y<myzero) + (1 - myzero - Y).*(Y>1-myzero);
    
    % Compute log odds ratio
    lnY = log(Yt./(1-Yt));
    
    % Spatially Lagged Regressors
    WX = [Wpop, Wmining, Wpowerplant, WdistPA];
    
    % Set of Covariates (Model Specification):
    if model == 0
                
        % Main Specification
        X     = [Alt Temp Rain Slope Soil DistPA Mining Powerplant PPNeighbor Pop Title Fines2005 Ibama WX];
        
    elseif model == 1
                
        % No Population
        X     = [Alt Temp Rain Slope Soil DistPA Mining Powerplant PPNeighbor Title Fines2005 Ibama WX];
       
    elseif model == 2
        
        % No land title proxy
        X     = [Alt Temp Rain Slope Soil DistPA Mining Powerplant PPNeighbor Pop Fines2005 Ibama WX];
    
    elseif model == 3
        
        % No distance to ibama, nor fines
        X     = [Alt Temp Rain Slope Soil DistPA Mining Powerplant PPNeighbor Pop Title WX];
    
    elseif model == 4
        
        % No distance to ibama
        X     = [Alt Temp Rain Slope Soil DistPA Mining Powerplant PPNeighbor Pop Title Fines2005 WX];
    
    elseif model == 5
        
        % No Fines
        X     = [Alt Temp Rain Slope Soil DistPA Mining Powerplant PPNeighbor Pop Title Ibama WX];
    
    elseif model == 6
        
        % No Spatially Lagged Regressors
        X     = [Alt Temp Rain Slope Soil DistPA Mining Powerplant PPNeighbor Pop Title Fines2005 Ibama];    
    
    end
    
    % Dimension of X
    dx = size(X,2);   
    
    % All Regressors -- Including TC and the constant term
    Xc  = [TC X ones(T,1)];
    dxc = size(Xc,2);           
    
    % Set of Instruments
    if d_iv == 1
        D = D(:,1); % only dist_port as IV
    end
    
    % Instrumental Variables
    Z  = [D X];
    dz = size(Z,2);    % Dimension of Z
            
    % All Instruments -- Including constant term
    Zc  = [Z ones(T,1)];
    dzc = size(Zc,2);   % Dimension of Zc
    
    % Vectors That Are Not Needed Anymore (Exogenous Regressors)
    clear Alt Temp Rain Slope Soil  Mining Powerplant Pop Title
    %clear G_IR G50 G75 G100 G125 G150        
    
    % II. Implement Pair Cluster Bootstrap
    
    % Fix Geographic Cluster
    for c = 1:size(Cluster,2)
        
        % Define Cluster
        gg = Cluster(:,c);
        
        % Unique Values for Cluster
        gid = unique(gg);
        
        % Number of Clusters
        G = length(gid);
            
        % Bootstrap Replications: Generate uniform random numbers over 1,...,G
        u = unidrnd(G,G,B); 
        
        % Bootstrap Replications: Draw Clusters Randomly
        gstar = gid(u);
        
        % Select Quantile
        for i=1:length(ts)
                       
            % IVQR Estimator
            beta_hat = inv_qr(lnY,TC,X,D,tss(i)/100);             
            
            % Loop over Bootstrap Samples and Clusters       
            for b=1:B            
                                
                % Store Data for each bootstrap sample
                yy = [];
                cc = [];
                xx = [];
                dd = [];
                
                for j=1:G
                    
                    % Draw All Bootstrap Observations from Cluster j
                    ytemp = lnY(gg==gstar(j,b));
                    ctemp = TC(gg==gstar(j,b));
                    xtemp = X(gg==gstar(j,b),:);
                    dtemp = D(gg==gstar(j,b),:);
                    
                    % Stack the observations by clusters
                    yy = [yy;ytemp];
                    cc = [cc;ctemp];
                    xx = [xx;xtemp];
                    dd = [dd;dtemp];
                    
                end
                
                % The bootstrap sample b
                lnYstar = yy;
                TCstar  = cc;
                Xstar   = xx; 
                Dstar   = dd; 
                
                % IVQR Estimator on Cluster Bootstrap Sample
                try
                    beta_star(:,b) = inv_qr(lnYstar,TCstar,Xstar,Dstar,tss(i)/100); 
                catch
                    %fprintf('Inconsistent data in iteration, skipped.\n', i);
                    continue;  % Jump to next iteration of: for i
                end
                                
            end
            
            % Bootstrap Standard Error (for quantile i)
            se_boot(i) = std(beta_star(1,:));
            %se_boot(m) = sqrt(1/(B-1) * sum( (mustar - mean(mustar)).^2));
            
            % Bootstrap t-statistic (for quantile i)
            t_boot(i) = beta_hat(1)/std(beta_star(1,:));
            clear beta_star
            
        end
        
        % Save All Quantile Results for Each Farm and Each Cluster
        
        % Farm 1
        if farm == 1 && c == 1
            
            % Save
            se_bootG1_farm1 = se_boot;
            t_bootG1_farm1  = t_boot;
            
        elseif farm == 1 && c == 2
            
            % Save
            se_bootG2_farm1 = se_boot;
            t_bootG2_farm1  = t_boot;
            
        elseif farm == 1 && c == 3
            
            % Save
            se_bootG3_farm1 = se_boot;
            t_bootG3_farm1  = t_boot;
            
        elseif farm == 1 && c == 4
            
            % Save
            se_bootG4_farm1 = se_boot;
            t_bootG4_farm1  = t_boot;
            
        elseif farm == 1 && c == 5
            
            % Save
            se_bootG5_farm1 = se_boot;
            t_bootG5_farm1  = t_boot;
            
        elseif farm == 1 && c == 6
            
            % Save
            se_bootG6_farm1 = se_boot;
            t_bootG6_farm1  = t_boot;
        
            
        % Farm 2
        elseif farm == 2 && c == 1
            
            % Save
            se_bootG1_farm2 = se_boot;
            t_bootG1_farm2  = t_boot;
            
        elseif farm == 2 && c == 2
            
            % Save
            se_bootG2_farm2 = se_boot;
            t_bootG2_farm2  = t_boot;
            
        elseif farm == 2 && c == 3
            
            % Save
            se_bootG3_farm2 = se_boot;
            t_bootG3_farm2  = t_boot;
            
        elseif farm == 2 && c == 4
            
            % Save
            se_bootG4_farm2 = se_boot;
            t_bootG4_farm2  = t_boot; 
            
        elseif farm == 2 && c == 5
            
            % Save
            se_bootG5_farm2 = se_boot;
            t_bootG5_farm2  = t_boot;
            
        elseif farm == 2 && c == 6
            
            % Save
            se_bootG6_farm2 = se_boot;
            t_bootG6_farm2  = t_boot;
            
            
        % Farm 3
        elseif farm == 3 && c == 1
            
            % Save
            se_bootG1_farm3 = se_boot;
            t_bootG1_farm3  = t_boot;
            
        elseif farm == 3 && c == 2
            
            % Save
            se_bootG2_farm3 = se_boot;
            t_bootG2_farm3  = t_boot;
            
        elseif farm == 3 && c == 3
            
            % Save
            se_bootG3_farm3 = se_boot;
            t_bootG3_farm3  = t_boot;
            
        elseif farm == 3 && c == 4
            
            % Save
            se_bootG4_farm3 = se_boot;
            t_bootG4_farm3  = t_boot;
            
        elseif farm == 3 && c == 5
            
            % Save
            se_bootG5_farm3 = se_boot;
            t_bootG5_farm3  = t_boot;
            
        elseif farm == 3 && c == 6
            
            % Save
            se_bootG6_farm3 = se_boot;
            t_bootG6_farm3  = t_boot;
            
                     
        % Farm 4               
        elseif farm == 4 && c == 1
            
            % Save
            se_bootG1_farm4 = se_boot;
            t_bootG1_farm4  = t_boot;
            
        elseif farm == 4 && c == 2
            
            % Save
            se_bootG2_farm4 = se_boot;
            t_bootG2_farm4  = t_boot;
            
        elseif farm == 4 && c == 3
            
            % Save
            se_bootG3_farm4 = se_boot;
            t_bootG3_farm4  = t_boot;
            
        elseif farm == 4 && c == 4
            
            % Save
            se_bootG4_farm4 = se_boot;
            t_bootG4_farm4  = t_boot;
            
        elseif farm == 4 && c == 5
            
            % Save
            se_bootG5_farm4 = se_boot;
            t_bootG5_farm4  = t_boot;
            
        elseif farm == 4 && c == 6
            
            % Save
            se_bootG6_farm4 = se_boot;
            t_bootG6_farm4  = t_boot;
                    
        end
        
    end

end
tElapsed_total = toc;
disp('----------------------------------------------------------------------------')
disp(' ')
disp('Time Elapsed for All Bootstraps (minutes)')
tElapsed_total/60
clear tElapsed_total ans

%% (B) SHOW RESULTS

% Show Results
disp('--------------------------------------------------------')
disp('--------------------------------------------------------')
disp('CLUSTER BOOSTRAP STANDARD ERRORS')
disp('SMALL FARMS')
table([se_bootG1_farm1(1);se_bootG2_farm1(1);se_bootG3_farm1(1)],...
    [se_bootG1_farm1(2);se_bootG2_farm1(2);se_bootG3_farm1(2)],...
    [se_bootG1_farm1(3);se_bootG2_farm1(3);se_bootG3_farm1(3)],...
    [se_bootG1_farm1(4);se_bootG2_farm1(4);se_bootG3_farm1(4)],...
    [se_bootG1_farm1(5);se_bootG2_farm1(5);se_bootG3_farm1(5)],...
    'VariableNames',{'Q10','Q25','Q50','Q75','Q90'},'RowNames',{'GIR','G50','G150'})
   
disp('SMALL-MEDIUM FARMS')
table([se_bootG1_farm2(1);se_bootG2_farm2(1);se_bootG3_farm2(1)],...
    [se_bootG1_farm2(2);se_bootG2_farm2(2);se_bootG3_farm2(2)],...
    [se_bootG1_farm2(3);se_bootG2_farm2(3);se_bootG3_farm2(3)],...
    [se_bootG1_farm2(4);se_bootG2_farm2(4);se_bootG3_farm2(4)],...
    [se_bootG1_farm2(5);se_bootG2_farm2(5);se_bootG3_farm2(5)],...
    'VariableNames',{'Q10','Q25','Q50','Q75','Q90'},'RowNames',{'GIR','G50','G150'})
 
disp('MEDIUM-LARGE FARMS')
table([se_bootG1_farm3(1);se_bootG2_farm3(1);se_bootG3_farm3(1)],...
    [se_bootG1_farm3(2);se_bootG2_farm3(2);se_bootG3_farm3(2)],...
    [se_bootG1_farm3(3);se_bootG2_farm3(3);se_bootG3_farm3(3)],...
    [se_bootG1_farm3(4);se_bootG2_farm3(4);se_bootG3_farm3(4)],...
    [se_bootG1_farm3(5);se_bootG2_farm3(5);se_bootG3_farm3(5)],...
    'VariableNames',{'Q10','Q25','Q50','Q75','Q90'},'RowNames',{'GIR','G50','G150'})
 
disp('LARGE FARMS')
table([se_bootG1_farm4(1);se_bootG2_farm4(1);se_bootG3_farm4(1)],...
    [se_bootG1_farm4(2);se_bootG2_farm4(2);se_bootG3_farm4(2)],...
    [se_bootG1_farm4(3);se_bootG2_farm4(3);se_bootG3_farm4(3)],...
    [se_bootG1_farm4(4);se_bootG2_farm4(4);se_bootG3_farm4(4)],...
    [se_bootG1_farm4(5);se_bootG2_farm4(5);se_bootG3_farm4(5)],...
    'VariableNames',{'Q10','Q25','Q50','Q75','Q90'},'RowNames',{'GIR','G50','G150'})
 
disp('--------------------------------------------------------')

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

